Loading both training and testing data

Teach<-read.csv("pml-training.csv", stringsAsFactors = F,na.strings = c("","NA","#DIV/0!"))
Ythu<-read.csv("pml-testing.csv", stringsAsFactors = F,na.strings = c("","NA","#DIV/0!"))
dim(Teach); 
## [1] 19622   160
dim(Ythu)
## [1]  20 160
teach_devide <- createDataPartition(Teach$classe, p = 0.8, list = F)
Val_data <- Teach[-teach_devide,]
Teach <- Teach[teach_devide,]
dim(Teach); 
## [1] 15699   160
dim(Val_data)
## [1] 3923  160
table(Teach$classe)/nrow(Teach)
## 
##         A         B         C         D         E 
## 0.2843493 0.1935155 0.1744060 0.1638958 0.1838334
kl <- sapply(select(Teach,names(Teach)[grepl("_belt",names(Teach))]),function(x) sum(is.na(x)))
kl
##            roll_belt           pitch_belt             yaw_belt 
##                    0                    0                    0 
##     total_accel_belt   kurtosis_roll_belt  kurtosis_picth_belt 
##                    0                15391                15415 
##    kurtosis_yaw_belt   skewness_roll_belt skewness_roll_belt.1 
##                15699                15390                15415 
##    skewness_yaw_belt        max_roll_belt       max_picth_belt 
##                15699                15385                15385 
##         max_yaw_belt        min_roll_belt       min_pitch_belt 
##                15391                15385                15385 
##         min_yaw_belt  amplitude_roll_belt amplitude_pitch_belt 
##                15391                15385                15385 
##   amplitude_yaw_belt var_total_accel_belt        avg_roll_belt 
##                15391                15385                15385 
##     stddev_roll_belt        var_roll_belt       avg_pitch_belt 
##                15385                15385                15385 
##    stddev_pitch_belt       var_pitch_belt         avg_yaw_belt 
##                15385                15385                15385 
##      stddev_yaw_belt         var_yaw_belt         gyros_belt_x 
##                15385                15385                    0 
##         gyros_belt_y         gyros_belt_z         accel_belt_x 
##                    0                    0                    0 
##         accel_belt_y         accel_belt_z        magnet_belt_x 
##                    0                    0                    0 
##        magnet_belt_y        magnet_belt_z 
##                    0                    0
vk <- sapply(select(Teach,names(Teach)[grepl("_arm",names(Teach))]),function(x) sum(is.na(x)))
vk
##            roll_arm           pitch_arm             yaw_arm     total_accel_arm 
##                   0                   0                   0                   0 
##       var_accel_arm        avg_roll_arm     stddev_roll_arm        var_roll_arm 
##               15385               15385               15385               15385 
##       avg_pitch_arm    stddev_pitch_arm       var_pitch_arm         avg_yaw_arm 
##               15385               15385               15385               15385 
##      stddev_yaw_arm         var_yaw_arm         gyros_arm_x         gyros_arm_y 
##               15385               15385                   0                   0 
##         gyros_arm_z         accel_arm_x         accel_arm_y         accel_arm_z 
##                   0                   0                   0                   0 
##        magnet_arm_x        magnet_arm_y        magnet_arm_z   kurtosis_roll_arm 
##                   0                   0                   0               15449 
##  kurtosis_picth_arm    kurtosis_yaw_arm   skewness_roll_arm  skewness_pitch_arm 
##               15451               15395               15448               15451 
##    skewness_yaw_arm        max_roll_arm       max_picth_arm         max_yaw_arm 
##               15395               15385               15385               15385 
##        min_roll_arm       min_pitch_arm         min_yaw_arm  amplitude_roll_arm 
##               15385               15385               15385               15385 
## amplitude_pitch_arm   amplitude_yaw_arm 
##               15385               15385
cg <- sapply(select(Teach,names(Teach)[grepl("_forearm",names(Teach))]),function(x) sum(is.na(x)))
cg
##            roll_forearm           pitch_forearm             yaw_forearm 
##                       0                       0                       0 
##   kurtosis_roll_forearm  kurtosis_picth_forearm    kurtosis_yaw_forearm 
##                   15448                   15449                   15699 
##   skewness_roll_forearm  skewness_pitch_forearm    skewness_yaw_forearm 
##                   15447                   15449                   15699 
##        max_roll_forearm       max_picth_forearm         max_yaw_forearm 
##                   15385                   15385                   15448 
##        min_roll_forearm       min_pitch_forearm         min_yaw_forearm 
##                   15385                   15385                   15448 
##  amplitude_roll_forearm amplitude_pitch_forearm   amplitude_yaw_forearm 
##                   15385                   15385                   15448 
##     total_accel_forearm       var_accel_forearm        avg_roll_forearm 
##                       0                   15385                   15385 
##     stddev_roll_forearm        var_roll_forearm       avg_pitch_forearm 
##                   15385                   15385                   15385 
##    stddev_pitch_forearm       var_pitch_forearm         avg_yaw_forearm 
##                   15385                   15385                   15385 
##      stddev_yaw_forearm         var_yaw_forearm         gyros_forearm_x 
##                   15385                   15385                       0 
##         gyros_forearm_y         gyros_forearm_z         accel_forearm_x 
##                       0                       0                       0 
##         accel_forearm_y         accel_forearm_z        magnet_forearm_x 
##                       0                       0                       0 
##        magnet_forearm_y        magnet_forearm_z 
##                       0                       0
rj <- sapply(select(Teach,names(Teach)[grepl("_dumbbell",names(Teach))]),function(x) sum(is.na(x)))
rj
##            roll_dumbbell           pitch_dumbbell             yaw_dumbbell 
##                        0                        0                        0 
##   kurtosis_roll_dumbbell  kurtosis_picth_dumbbell    kurtosis_yaw_dumbbell 
##                    15389                    15387                    15699 
##   skewness_roll_dumbbell  skewness_pitch_dumbbell    skewness_yaw_dumbbell 
##                    15388                    15386                    15699 
##        max_roll_dumbbell       max_picth_dumbbell         max_yaw_dumbbell 
##                    15385                    15385                    15389 
##        min_roll_dumbbell       min_pitch_dumbbell         min_yaw_dumbbell 
##                    15385                    15385                    15389 
##  amplitude_roll_dumbbell amplitude_pitch_dumbbell   amplitude_yaw_dumbbell 
##                    15385                    15385                    15389 
##     total_accel_dumbbell       var_accel_dumbbell        avg_roll_dumbbell 
##                        0                    15385                    15385 
##     stddev_roll_dumbbell        var_roll_dumbbell       avg_pitch_dumbbell 
##                    15385                    15385                    15385 
##    stddev_pitch_dumbbell       var_pitch_dumbbell         avg_yaw_dumbbell 
##                    15385                    15385                    15385 
##      stddev_yaw_dumbbell         var_yaw_dumbbell         gyros_dumbbell_x 
##                    15385                    15385                        0 
##         gyros_dumbbell_y         gyros_dumbbell_z         accel_dumbbell_x 
##                        0                        0                        0 
##         accel_dumbbell_y         accel_dumbbell_z        magnet_dumbbell_x 
##                        0                        0                        0 
##        magnet_dumbbell_y        magnet_dumbbell_z 
##                        0                        0
rcb <- c(names(kl[kl!=0]), names(vk[vk!=0]),names(cg[cg!=0]),names(rj[rj!=0]))
length(rcb)
## [1] 100
foni_fg<-tbl_df(Teach%>%select(-rcb,-c(X,user_name,raw_timestamp_part_1,raw_timestamp_part_2,cvtd_timestamp,new_window,num_window)))
## Warning: `tbl_df()` is deprecated as of dplyr 1.0.0.
## Please use `tibble::as_tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(rcb)` instead of `rcb` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
foni_fg$classe<-as.factor(foni_fg$classe)
foni_fg[,1:52]<-lapply(foni_fg[,1:52],as.numeric)
dim(foni_fg)
## [1] 15699    53
o_o <- cor(select(foni_fg, -classe))
diag(o_o) <- 0
o_o <- which(abs(o_o)>0.8,arr.ind = T)
o_o <- unique(row.names(o_o))
corrplot(cor(select(foni_fg,o_o)),type="upper",order="hclust",method="number")
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(o_o)` instead of `o_o` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.

zxn <-foni_fg%>%binarize(n_bins=4,thresh_infreq=0.01)
ms <- zxn %>% correlate(target=classe__A) 
ar<-zxn%>%correlate(target=classe__B)
ws <- zxn%>%correlate(target=classe__C)
iu<-zxn%>%correlate(target=classe__D)
hj<-zxn %>% correlate(target = classe__E)
a_pol <- c("magnet_arm_x", "pitch_forearm" , "magnet_dumbbell_y", "roll_forearm", "gyros_dumbbell_y") 
b_pol <- c("magnet_dumbbell_y", "magnet_dumbbell_x" , "roll_dumbbell" , 
           "magnet_belt_y" , "accel_dumbbell_x" )
c_pol <- c("magnet_dumbbell_y", "roll_dumbbell" , "accel_dumbbell_y" , 
           "magnet_dumbbell_x", "magnet_dumbbell_z")
d_pol <- c("pitch_forearm" , "magnet_arm_y" , "magnet_forearm_x",
           "accel_dumbbell_y", "accel_forearm_x")
e_pol <- c("magnet_belt_y" , "magnet_belt_z" , "roll_belt", 
           "gyros_belt_z" , "magnet_dumbbell_y")
jp <- character()
for(c in c(a_pol,b_pol,c_pol,d_pol,e_pol)){
  jp <- union(jp,c)
}
foni_fg2 <- foni_fg%>%select(jp,classe)
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(jp)` instead of `jp` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
data.frame("arm" = sum(grepl("_arm",jp)),"forearm"=sum(grepl("_forearm",jp)),"belt"=sum(grepl("_belt",jp)),"dumbbell"=sum(grepl("_dumbbell",jp)))
##   arm forearm belt dumbbell
## 1   2       4    4        7
k_b<-function(data, mapping, ...) {
  ggplot(data = data, mapping=mapping)+geom_density(..., alpha = 0.3)+scale_fill_brewer(palette="Set2") 
}
k_n<-function(data, mapping, ...) {
  ggplot(data = data, mapping=mapping)+geom_point(..., alpha = 0.1)+ scale_fill_brewer(palette="Set2") 
}
ggpairs(foni_fg2,columns = 1:5,aes(color = classe),lower = list(continuous = k_n),diag = list(continuous = k_b))

ggpairs(foni_fg2,columns=6:10,aes(color=classe),lower=list(continuous=k_n),diag =list(continuous=k_b))

ggpairs(foni_fg2,columns = 11:17,aes(color = classe),lower=list(continuous=k_n),diag=list(continuous=k_b))

TeachF <- Teach %>% select(jp,classe)
Thv_infoF<-Val_data %>% select(jp,classe)
TeachF[,1:17] <- sapply(TeachF[,1:17],as.numeric)
Thv_infoF[,1:17] <- sapply(Thv_infoF[,1:17],as.numeric)
thm<-c("A", "B", "C", "D", "E")
abb <- preProcess(TeachF[,-18],method = c("center","scale","BoxCox"))
Se_x <- predict(abb,select(TeachF,-classe))
Se_y <- factor(TeachF$classe,levels=thm)
W_x <- predict(abb,select(Thv_infoF,-classe))
W_y<- factor(Thv_infoF$classe,levels=thm)
K_tr <- trainControl(method="cv", number=5)
KT_n <- train(x = Se_x,y = Se_y,method = "rpart", trControl = K_tr)
WF_n <- train(x = Se_x,y = Se_y, method = "rf", trControl = K_tr,verbose=FALSE, metric = "Accuracy")
ERF_n <- train(x = Se_x,y = Se_y,method = "gbm",trControl=K_tr,verbose=FALSE)
RGN_n <- svm(x = Se_x,y = Se_y,kernel = "polynomial", cost = 10)
confusionMatrix(predict(KT_n,W_x),W_y)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    A    B    C    D    E
##          A 1013  305  321  299  100
##          B   17  260   18  123   79
##          C   83  194  345  221  210
##          D    0    0    0    0    0
##          E    3    0    0    0  332
## 
## Overall Statistics
##                                           
##                Accuracy : 0.4971          
##                  95% CI : (0.4813, 0.5128)
##     No Information Rate : 0.2845          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3428          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E
## Sensitivity            0.9077  0.34256  0.50439   0.0000  0.46047
## Specificity            0.6348  0.92509  0.78141   1.0000  0.99906
## Pos Pred Value         0.4971  0.52314  0.32764      NaN  0.99104
## Neg Pred Value         0.9454  0.85435  0.88188   0.8361  0.89158
## Prevalence             0.2845  0.19347  0.17436   0.1639  0.18379
## Detection Rate         0.2582  0.06628  0.08794   0.0000  0.08463
## Detection Prevalence   0.5195  0.12669  0.26842   0.0000  0.08539
## Balanced Accuracy      0.7713  0.63383  0.64290   0.5000  0.72977
confusionMatrix(predict(WF_n,W_x),W_y)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    A    B    C    D    E
##          A 1115   13    0    1    0
##          B    0  731    4    3    1
##          C    1   13  675   15    1
##          D    0    2    5  624    2
##          E    0    0    0    0  717
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9845          
##                  95% CI : (0.9801, 0.9881)
##     No Information Rate : 0.2845          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9803          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E
## Sensitivity            0.9991   0.9631   0.9868   0.9705   0.9945
## Specificity            0.9950   0.9975   0.9907   0.9973   1.0000
## Pos Pred Value         0.9876   0.9892   0.9574   0.9858   1.0000
## Neg Pred Value         0.9996   0.9912   0.9972   0.9942   0.9988
## Prevalence             0.2845   0.1935   0.1744   0.1639   0.1838
## Detection Rate         0.2842   0.1863   0.1721   0.1591   0.1828
## Detection Prevalence   0.2878   0.1884   0.1797   0.1614   0.1828
## Balanced Accuracy      0.9971   0.9803   0.9888   0.9839   0.9972
plot(WF_n$finalModel,main="Error VS no of tree")

confusionMatrix(predict(ERF_n,W_x),W_y)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    A    B    C    D    E
##          A 1094   47    1    6    3
##          B   12  619   37    8    3
##          C    6   59  632   47   12
##          D    4   30   14  578    8
##          E    0    4    0    4  695
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9223          
##                  95% CI : (0.9134, 0.9304)
##     No Information Rate : 0.2845          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9016          
##                                           
##  Mcnemar's Test P-Value : 2.082e-12       
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E
## Sensitivity            0.9803   0.8155   0.9240   0.8989   0.9639
## Specificity            0.9797   0.9810   0.9617   0.9829   0.9975
## Pos Pred Value         0.9505   0.9116   0.8360   0.9117   0.9886
## Neg Pred Value         0.9921   0.9568   0.9836   0.9802   0.9919
## Prevalence             0.2845   0.1935   0.1744   0.1639   0.1838
## Detection Rate         0.2789   0.1578   0.1611   0.1473   0.1772
## Detection Prevalence   0.2934   0.1731   0.1927   0.1616   0.1792
## Balanced Accuracy      0.9800   0.8983   0.9428   0.9409   0.9807
confusionMatrix(predict(RGN_n,W_x),W_y)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    A    B    C    D    E
##          A 1106   49   15   22    3
##          B    1  664   14    3    3
##          C    4   41  643   50    5
##          D    5    2    7  565   11
##          E    0    3    5    3  699
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9373          
##                  95% CI : (0.9292, 0.9447)
##     No Information Rate : 0.2845          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9205          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E
## Sensitivity            0.9910   0.8748   0.9401   0.8787   0.9695
## Specificity            0.9683   0.9934   0.9691   0.9924   0.9966
## Pos Pred Value         0.9255   0.9693   0.8654   0.9576   0.9845
## Neg Pred Value         0.9963   0.9707   0.9871   0.9766   0.9932
## Prevalence             0.2845   0.1935   0.1744   0.1639   0.1838
## Detection Rate         0.2819   0.1693   0.1639   0.1440   0.1782
## Detection Prevalence   0.3046   0.1746   0.1894   0.1504   0.1810
## Balanced Accuracy      0.9797   0.9341   0.9546   0.9355   0.9830
Ythu2 <- Ythu %>% select(jp,problem_id)
xYthu <- Ythu2 %>% select(jp)
result <- data.frame("problem_id" = Ythu$problem_id,"PREDICTION_RF"=predict(WF_n,xYthu),"PREDICTION_GBM"=predict(ERF_n,xYthu),"PREDICTION_SVM"=predict(RGN_n,xYthu))
result
##    problem_id PREDICTION_RF PREDICTION_GBM PREDICTION_SVM
## 1           1             E              E              C
## 2           2             A              E              A
## 3           3             A              E              B
## 4           4             E              E              B
## 5           5             E              E              A
## 6           6             E              D              A
## 7           7             E              E              B
## 8           8             B              D              B
## 9           9             A              E              E
## 10         10             E              E              E
## 11         11             A              E              B
## 12         12             A              B              A
## 13         13             B              B              E
## 14         14             A              A              B
## 15         15             E              E              B
## 16         16             E              E              A
## 17         17             E              E              A
## 18         18             B              E              A
## 19         19             E              E              A
## 20         20             E              E              D